##### BRFSS Analysis #####

###############Data parsing/cleaning################
library(Hmisc)

states<-c(4,23,32,33,35,36,42) #FIPS codes
setwd(<INSERT WD HERE>)

###Pre-implementation
myvars <- c("x.state","x.ststr","x.psu","x.finalwt","imonth","iyear",
            "medcost","age","orace","hispanic","income2","sex","numadult",
            "chld04","chld0512","chld1317")
#1996
mydata<-sasxport.get(unzip("CDBRFS96XPT.zip"))
file.remove("CDBRFS96.XPT") #Save space on disk
data96<-subset(mydata, x.state %in% states, select=myvars)
#1997
mydata<-sasxport.get(unzip("CDBRFS97XPT.zip"))
file.remove("CDBRFS97.XPT")
data97<-subset(mydata, x.state %in% states, select=myvars)
#1998
mydata<-sasxport.get(unzip("CDBRFS98XPT.zip"))
file.remove("CDBRFS98.XPT")
data98<-subset(mydata, x.state %in% states, select=myvars)
#1999
mydata<-sasxport.get(unzip("CDBRFS99XPT.zip"))
file.remove("CDBRFS99.XPT")
data99<-subset(mydata, x.state %in% states, select=myvars)
#2000
mydata<-sasxport.get(unzip("CDBRFS00XPT.zip"))
file.remove("CDBRFS00.XPT")
data00<-subset(mydata, x.state %in% states, select=myvars)

###Post-implementation
myvars2<- c("x.state","x.ststr","x.psu","x.finalwt","imonth","iyear",
            "medcost","age","x.mrace","hispanc2","income2","sex","numadult",
            "children")
#2003
mydata<-sasxport.get(unzip("CDBRFS03XPT.zip"))
file.remove("CDBRFS03.XPT")
data03<-subset(mydata, x.state %in% states, select=myvars2)
#2004
mydata<-sasxport.get(unzip("CDBRFS04XPT.zip"))
file.remove("CDBRFS04.XPT")
data04<-subset(mydata, x.state %in% states, select=myvars2)
#2005
mydata<-sasxport.get(unzip("CDBRFS05XPT.zip"))
file.remove("CDBRFS05.XPT")
data05<-subset(mydata, x.state %in% states, select=myvars2)
#2006
mydata<-sasxport.get(unzip("CDBRFS06XPT.zip"))
file.remove("CDBRFS06.XPT")
data06<-subset(mydata, x.state %in% states, select=myvars2)
#2007
mydata<-sasxport.get(unzip("CDBRFS07XPT.zip"))
file.remove("CDBRFS07.XPT")
data07<-subset(mydata, x.state %in% states, select=myvars2)

rm(mydata)

#Merge pre/post
pre<-rbind(data96,data97,data98,data99,data00)
post<-rbind(data03,data04,data05,data06,data07)

#Clean
preClean<-subset(pre,medcost==1 | medcost==2) #Medcost reported
postClean<-subset(post,medcost==1 | medcost==2)
preClean<-subset(preClean,age>=19 & age<=64) #Ages 19-64 (inclusive)
postClean<-subset(postClean,age>=19 & age<=64)
preClean<-subset(preClean,orace<=5) #Race reported
postClean<-subset(postClean,x.mrace<=7) 
preClean<-subset(preClean,hispanic<=2) #Hispanic Yes/No
postClean<-subset(postClean,hispanc2<=2)
preClean<-subset(preClean,income2<77) #Income reported
postClean<-subset(postClean,income2<77) 
preClean<-subset(preClean,chld04<9) # Num children reported
preClean<-subset(preClean,chld0512<9)
preClean<-subset(preClean,chld1317<9)
postClean<-subset(postClean,children<99)
#Replace 0 children codes with "0"
preClean$chld04[preClean$chld04==8]<-0
preClean$chld0512[preClean$chld0512==8]<-0
preClean$chld1317[preClean$chld1317==8]<-0
postClean$children[postClean$children==88]<-0
#Calculate family size
preClean$family<-preClean$numadult+preClean$chld04+preClean$chld0512+preClean$chld1317
postClean$family<-postClean$numadult+postClean$children

#Create final dataset
#Pre
preFinal<-preClean
preFinal$race<-preFinal$orace 
preFinal$orace<-NULL
preFinal$numadult<-NULL
preFinal$chld04<-NULL
preFinal$chld0512<-NULL
preFinal$chld1317<-NULL
preFinal$postExpansion<-0
#Post
postFinal<-postClean
postFinal$race<-postFinal$x.mrace
#Combine race categories to match orace
postFinal$race[postFinal$race==4]<-3 #Hawaiian/Pacific -> Asian/Pacific
postFinal$race[postFinal$race==5]<-4 #Recode Indian/Alaska
postFinal$race[postFinal$race>5]<-5 #Other/Multi -> Other
postFinal$x.mrace<-NULL
postFinal$hispanic<-postFinal$hispanc2
postFinal$hispanc2<-NULL
postFinal$numadult<-NULL
postFinal$children<-NULL
postFinal$postExpansion<-1

#Merge
final<-rbind(preFinal,postFinal)
#Define expansion states
final$treatmentState<-0
final$treatmentState[final$x.state==4]<-1 #Arizona
final$treatmentState[final$x.state==23]<-1 #Maine
final$treatmentState[final$x.state==36]<-1 #New York
final$medicaidExpansion<-final$postExpansion*final$treatmentState
#Define age groups
final$ageGroup<-0
final$ageGroup<-ifelse(final$age>=19 & final$age<=24,1,final$ageGroup)#19-24
final$ageGroup<-ifelse(final$age>=25 & final$age<=34,2,final$ageGroup)#25-34
final$ageGroup<-ifelse(final$age>=35 & final$age<=44,3,final$ageGroup)#35-44
final$ageGroup<-ifelse(final$age>=45 & final$age<=54,4,final$ageGroup)#45-54
final$ageGroup<-ifelse(final$age>=55 & final$age<=64,4,final$ageGroup)#55-64
#Define race groups
final$raceGroup<-0 #Other (ref.)
final$raceGroup<-ifelse(final$race==1,1,final$raceGroup) #White
final$raceGroup<-ifelse(final$race==2,2,final$raceGroup) #Black
#Recode hispanic No to 0
final$hispanic[final$hispanic==2]<-0
#Recode sex Female to 0
final$sex[final$sex==2]<-0

#Define treatment-control interactions
final$tcInter1<-0
final$tcInter1[final$x.state==4 | final$x.state==35 | final$x.state==32]<-1 #Arizona/New Mexico/Nevada
final$tcInter1=final$tcInter1*as.numeric(final$iyear)
final$tcInter2<-0
final$tcInter2[final$x.state==23 | final$x.state==33]<-1 #Maine/New Hampshire
final$tcInter2=final$tcInter2*as.numeric(final$iyear)
final$tcInter3<-0
final$tcInter3[final$x.state==36 | final$x.state==42]<-1 #New York/Penn
final$tcInter3=final$tcInter3*as.numeric(final$iyear)
#Recode direction of medcost
final$medcost[final$medcost==2]<-0
#Define subgroups
final$white<-0
final$white[final$race==1 & final$hispanic==2]<-1 #Non-hispanic white
final$nonwhite<-0
final$nonwhite[final$white==0]<-1 #Non-white
final$older<-0
final$older[final$age>34]<-1
final$younger<-0
final$younger[final$older==0]<-1
final$poor<-0
final$poor[final$income2<6]<-1
final$nonpoor<-0
final$nonpoor[final$poor==0]<-1

#Save dataset
save(final,file="BRFSS_Clean.RData")
##########################################################################################

### Fig S3 Unadjusted Rates of Delaying Medical Care Due to Cost Among Adults (19-64) ##########################
weighted.mean(final$medcost[final$postExpansion==0 & final$treatmentState==1],
              final$x.finalwt[final$postExpansion==0 & final$treatmentState==1]) #Expansion Pre-Expansion
weighted.mean(final$medcost[final$postExpansion==1 & final$treatmentState==1],
              final$x.finalwt[final$postExpansion==1 & final$treatmentState==1]) #Expansion Post-Expansion
weighted.mean(final$medcost[final$postExpansion==0 & final$treatmentState==0],
              final$x.finalwt[final$postExpansion==0 & final$treatmentState==0]) #Control Pre-Expansion
weighted.mean(final$medcost[final$postExpansion==1 & final$treatmentState==0],
              final$x.finalwt[final$postExpansion==1 & final$treatmentState==0]) #Control Post-Expansion
###############################################################################################################


###Perform regression analysis for Table 3 ####################################################
#Define survey design
library(survey)
final$x.ststr[final$x.ststr==23091]<-23092 #Recode strata with only one PSU
final$x.ststr[final$x.ststr==32042]<-32041
design<-svydesign(id=~x.psu, strata=~x.ststr, weight=~x.finalwt, data=final, nest=TRUE)

#Overall
sfit<-svyglm(medcost ~ medicaidExpansion + 
              factor(raceGroup)+sex+hispanic+factor(income2)+factor(family)+factor(ageGroup)
             +factor(iyear)+factor(x.state)
             +factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
             , design=design)
summary(sfit)

#Subgroups

#White non-Latino (include dummy for nonwhite so we can pull SE from orig coeff)
sfitW<-svyglm(medcost ~ medicaidExpansion + 
                factor(raceGroup)+sex+hispanic+factor(income2)+factor(family)+factor(ageGroup)
              +factor(iyear)+factor(x.state)
              +factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
              +(medicaidExpansion*nonwhite)
              , design=design)
summary(sfitW)
#Non-white/Latino
sfitW<-svyglm(medcost ~ medicaidExpansion + 
              factor(raceGroup)+sex+hispanic+factor(income2)+factor(family)+factor(ageGroup)
              +factor(iyear)+factor(x.state)
              +factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
              +(medicaidExpansion*white)
              , design=design)
summary(sfitW)



#Age (younger)
sfitA<-svyglm(medcost ~ medicaidExpansion + 
              factor(raceGroup)+sex+hispanic+factor(income2)+factor(family)+factor(ageGroup)
              +factor(iyear)+factor(x.state)
              +factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
              +(medicaidExpansion*older)
              , design=design)
summary(sfitA)
#Older
sfitA<-svyglm(medcost ~ medicaidExpansion + 
                factor(raceGroup)+sex+hispanic+factor(income2)+factor(family)+factor(ageGroup)
              +factor(iyear)+factor(x.state)
              +factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
              +(medicaidExpansion*younger)
              , design=design)
summary(sfitA)

#Poor
sfitP<-svyglm(medcost ~ medicaidExpansion + 
                factor(raceGroup)+sex+hispanic+factor(income2)+factor(family)+factor(ageGroup)
              +factor(iyear)+factor(x.state)
              +factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
              +(medicaidExpansion*nonpoor)
              , design=design)
summary(sfitP)
#Non-Poor
sfitP<-svyglm(medcost ~ medicaidExpansion + 
                factor(raceGroup)+sex+hispanic+factor(income2)+factor(family)+factor(ageGroup)
              +factor(iyear)+factor(x.state)
              +factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
              +(medicaidExpansion*poor)
              , design=design)
summary(sfitP)




##### CPS Analaysis #####

#######Data parsing/cleaning ##################################
setwd(<INSERT WD HERE>)

states<-c(11,12,21,23,85,86,88)
sTreat<-c(11,21,86)

#####Parse data#####
parseData<-function(fileName,year){
  #Household records
  mydata<-read.fwf(fileName, c(1,5,-33,2,-8,3), col.names=c("hrecord","h-seq","hg-st60","geco"))
  CPShouse<-subset(mydata,hrecord==1) #Limit to household records
  #Family records
  mydata<-read.fwf(fileName, c(1,5,2,-2,2,-25,2), col.names=c("frecord","fh-seq","ffpos","fpersons","povll"))
  CPSfamily<-subset(mydata,frecord==2) #Limit to family records
  #Person records
  if(year<2003){
    mydata<-read.fwf(fileName, 
                   c(1,5,2,-6,2,-3,1,-4,1,-1,2,-15,2,-4,8,-8,8,-395,1,1,1,1,-275,1,-1,1,-6,1,-1,1,-5,1,1,1,-2,1,-5,1,-23,1), 
                   col.names=c("precord","ph-seq","ffpos","a-age","a-sex","a-race","a-reorgn","phf-seq","a-fnlwgt","marsupwt",
                               "mcare","mcaid","champ","hi-yn","hi","dephi","priv","depriv","out","care","caid","oth","othstper","hea")
    )
  }
  else{ #Updated race/ethnicity variables
    mydata<-read.fwf(fileName, 
                   c(1,5,2,-6,2,-3,1,-3,2,-1,1,-16,2,-4,8,-8,8,-395,1,1,1,1,-275,1,-1,1,-6,1,-1,1,-5,1,1,1,-2,1,-5,1,-23,1), 
                    col.names=c("precord","ph-seq","ffpos","a-age","a-sex","prdtrace","pehspnon","phf-seq","a-fnlwgt","marsupwt",
                                "mcare","mcaid","champ","hi-yn","hi","dephi","priv","depriv","out","care","caid","oth","othstper","hea")
    )
  }
  CPSperson<-subset(mydata,precord==3) #Limit to person records
  CPSperson$year<-year #Add year manually
  CPSperson$state<-CPShouse$hg.st60[match(CPSperson$ph.seq,CPShouse$h.seq)] #Impute state from household record
  CPSperson$famsize<-CPSfamily$fpersons[match(interaction(CPSperson$ph.seq,CPSperson$phf.seq),interaction(CPSfamily$fh.seq,CPSfamily$ffpos))] #Impute family size
  CPSperson$povratio<-CPSfamily$povll[match(interaction(CPSperson$ph.seq,CPSperson$phf.seq),interaction(CPSfamily$fh.seq,CPSfamily$ffpos))] #Impute poverty ratio
  
  CPSperson<-subset(CPSperson,state %in% states) #Limit to treatment/control states
  CPSperson$treat<-0 #Assign treatment/control codes
  CPSperson$treat[CPSperson$state %in% sTreat]<-1
  return(CPSperson)
}

###Read in datasets - takes a long time!
cps97<-parseData("cpsmar97.dat.gz",1997)
cps98<-parseData("mar98supp.cps.gz",1998)
cps99<-parseData("mar99supp.cps.gz",1999)
cps00<-parseData("mar00supp.cps.gz",2000)
cps01<-parseData("mar01supp.dat.gz",2001)
cps02<-parseData("mar02supp.dat.gz",2002)
cps03<-parseData("asec2003.pub.gz",2003)
cps04<-parseData("asec2004.pub.gz",2004)
cps05<-parseData("asec2005_pubuse.pub.gz",2005)
cps06<-parseData("asec2006_pubuse.pub.gz",2006)
cps07<-parseData("asec2007_pubuse_tax2.dat.gz",2007)

####Merge 97-02 and 03-07
cps9702<-rbind(cps97,cps98,cps99,cps00,cps01,cps02)
cps0307<-rbind(cps03,cps04,cps05,cps06,cps07)
#Race/ethnicity crosswalks
table(cps9702$a.race,exclude=NULL) #Get freqs
table(cps9702$a.reorgn,exclude=NULL)
table(cps0307$prdtrace,exclude=NULL)
table(cps0307$pehspnon,exclude=NULL)
#Recode to white/non-white and hispanic/non-hispanic
cps9702$white<-0
cps9702$white[cps9702$a.race==1]<-1
cps0307$white<-0
cps0307$white[cps0307$prdtrace==1]<-1
cps9702$hispanic<-0
cps9702$hispanic[cps9702$a.reorgn<8]<-1
cps0307$hispanic<-0
cps0307$hispanic[cps0307$pehspnon==1]<-1
#Race groups: white, black, other
cps9702$raceGroup<-0 #Other (ref.)
cps9702$raceGroup<-ifelse(cps9702$a.race==1,1,cps9702$raceGroup) #White
cps9702$raceGroup<-ifelse(cps9702$a.race==2,2,cps9702$raceGroup) #Black
cps0307$raceGroup<-0 #Other (ref.)
cps0307$raceGroup<-ifelse(cps0307$prdtrace==1,1,cps0307$raceGroup) #White
cps0307$raceGroup<-ifelse(cps0307$prdtrace==2,2,cps0307$raceGroup) #Black
cps9702$a.race<-NULL
cps9702$a.reorgn<-NULL
cps0307$prdtrace<-NULL
cps0307$pehspnon<-NULL
#Merge final dataset
cpsFinal<-rbind(cps9702,cps0307)

#Define expansion period
cpsFinal$postExpansion<-0
cpsFinal$postExpansion[(cpsFinal$state==11 | cpsFinal$state==12) & cpsFinal$year>2002]<-1 #Maine/New Hampshire
cpsFinal$postExpansion[cpsFinal$state>12 & cpsFinal$year>2001]<-1 #Rest of states
#Define reference year (0=year of expansion)
cpsFinal$expYear<-0
cpsFinal$expYear[(cpsFinal$state==11 | cpsFinal$state==12)]<-2002 #Maine/New Hampshire
cpsFinal$expYear[cpsFinal$state>12]<-2001 #Rest of states
cpsFinal$refYear<-(cpsFinal$year-cpsFinal$expYear)

#Restrict to ages 19-64 (inclusive)
cpsFinal<-subset(cpsFinal,a.age>=19 & a.age<=64) #Ages 19-64 (inclusive)
#Create age groups
cpsFinal$age1924<-ifelse(cpsFinal$a.age>=19 & cpsFinal$a.age<=24,1,0)
cpsFinal$age2534<-ifelse(cpsFinal$a.age>=25 & cpsFinal$a.age<=34,1,0)
cpsFinal$age3544<-ifelse(cpsFinal$a.age>=35 & cpsFinal$a.age<=44,1,0)
cpsFinal$age4554<-ifelse(cpsFinal$a.age>=45 & cpsFinal$a.age<=54,1,0)
cpsFinal$age5564<-ifelse(cpsFinal$a.age>=55 & cpsFinal$a.age<=64,1,0)
#Create Male variable
cpsFinal$male<-ifelse(cpsFinal$a.sex==1,1,0)
#Create Income groups
cpsFinal$FPL0_100<-ifelse(cpsFinal$povratio<4,1,0)
cpsFinal$FPL100_200<-ifelse(cpsFinal$povratio>=4 & cpsFinal$povratio<8,1,0)

#Renormalize weights
cpsFinal$wt<-cpsFinal$marsupwt/length(cpsFinal$marsupwt)
#Save dataset
save(cpsFinal,file="CPS_Clean.RData")

###################################################################################

### Table 1 #####################################################################
library(Hmisc)
#Mean Age-Expansion
xm<-wtd.mean(cpsFinal$a.age[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$a.age[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#Mean Age-Control
xm<-wtd.mean(cpsFinal$a.age[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$a.age[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))

#Age group %s
#19-24 Expansion
xm<-wtd.mean(cpsFinal$age1924[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age1924[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#19-24 Control
xm<-wtd.mean(cpsFinal$age1924[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age1924[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))
#25-34 Expansion
xm<-wtd.mean(cpsFinal$age2534[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age2534[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#25-34 Control
xm<-wtd.mean(cpsFinal$age2534[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age2534[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))
#35-44 Expansion
xm<-wtd.mean(cpsFinal$age3544[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age3544[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#35-44 Control
xm<-wtd.mean(cpsFinal$age3544[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age3544[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))
#45-54 Expansion
xm<-wtd.mean(cpsFinal$age4554[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age4554[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#45-54 Control
xm<-wtd.mean(cpsFinal$age4554[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age4554[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))
#55-64 Expansion
xm<-wtd.mean(cpsFinal$age5564[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age5564[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#55-64 Control
xm<-wtd.mean(cpsFinal$age5564[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$age5564[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))

# Male % 
#Expansion
xm<-wtd.mean(cpsFinal$male[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$male[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#Control
xm<-wtd.mean(cpsFinal$male[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$male[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))

#Race or ethnic group %
#Race-Expansion
xm<-wtd.mean(cpsFinal$white[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$white[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#Race-Control
xm<-wtd.mean(cpsFinal$white[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$white[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))
#Hispanic-Expansion
xm<-wtd.mean(cpsFinal$hispanic[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$hispanic[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#Hispanic-Control
xm<-wtd.mean(cpsFinal$hispanic[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$hispanic[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))

#Income
#<100% FPL Expansion
xm<-wtd.mean(cpsFinal$FPL0_100[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$FPL0_100[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#Control
xm<-wtd.mean(cpsFinal$FPL0_100[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$FPL0_100[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))
#100-200% FPL Expansion
xm<-wtd.mean(cpsFinal$FPL100_200[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$FPL100_200[cpsFinal$treat==1 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==1 & cpsFinal$postExpansion==0]))
#Control
xm<-wtd.mean(cpsFinal$FPL100_200[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0])
sd<-sqrt(wtd.var(cpsFinal$FPL100_200[cpsFinal$treat==0 & cpsFinal$postExpansion==0],cpsFinal$wt[cpsFinal$treat==0 & cpsFinal$postExpansion==0]))
##########################################################################################

### Figure 1.B Medicaid Enrollment #######################################################
cpsFinal$mcaid[cpsFinal$mcaid==2]<-0 #Recode mcaid No to 0
cpsFinal$caid[cpsFinal$caid==2]<-0 #Recode caid No to 0

#Calculate Medcaid %
library(data.table)
dt<-as.data.table(cpsFinal)
#mcaid
dt[,list(mcaidp=weighted.mean(mcaid[treat==0],marsupwt[treat==0])),by=refYear] #Control
dt[,list(mcaidp=weighted.mean(mcaid[treat==1],marsupwt[treat==1])),by=refYear] #Treat
#caid
dt[,list(mcaidp=weighted.mean(caid[treat==0],marsupwt[treat==0])),by=refYear] #Control
dt[,list(mcaidp=weighted.mean(caid[treat==1],marsupwt[treat==1])),by=refYear] #Treat
##########################################################################################

### Figure S1 Unadjusted Rates of Uninsurance Among Adults (19-64) ################################
cpsFinal$mcare[cpsFinal$mcare==2]<-0 #Recode no to 0
cpsFinal$champ[cpsFinal$champ==2]<-0 #Recode no to 0
cpsFinal$hi.yn[cpsFinal$hi.yn==2]<-0 #Recode no to 0
cpsFinal$anyHI<-(cpsFinal$mcare+cpsFinal$mcaid+cpsFinal$champ+cpsFinal$hi.yn)
cpsFinal$anyHI<-ifelse(cpsFinal$anyHI>0,1,0) #Calculate no insurance
dt<-as.data.table(cpsFinal)
#Print
dt[,list(anyHIp=weighted.mean(anyHI[treat==0],marsupwt[treat==0])),by=refYear] #Control
dt[,list(anyHIp=weighted.mean(anyHI[treat==1],marsupwt[treat==1])),by=refYear] #Treat
##############################################################################################

### Figure S2 Unadjusted Rates of Excellent/Very Good Health Among Adults (19-64) ################################
cpsFinal$goodHealth<-ifelse(cpsFinal$hea<3,1,0)
dt<-as.data.table(cpsFinal)
#Print
dt[,list(goodHealthp=weighted.mean(goodHealth[treat==1],marsupwt[treat==1])),by=refYear] #Treat
dt[,list(goodHealthp=weighted.mean(goodHealth[treat==0],marsupwt[treat==0])),by=refYear] #Control
#####################################################################################################

######Table 3 ###########################################################################################
#Define treatment-control interactions
cpsFinal$tcInter1<-0
cpsFinal$tcInter1[cpsFinal$state==86 | cpsFinal$state==85 | cpsFinal$state==88]<-1 #Arizona/New Mexico/Nevada
cpsFinal$tcInter1=cpsFinal$tcInter1*as.numeric(cpsFinal$year)
cpsFinal$tcInter2<-0
cpsFinal$tcInter2[cpsFinal$state==11 | cpsFinal$state==12]<-1 #Maine/New Hampshire
cpsFinal$tcInter2=cpsFinal$tcInter2*as.numeric(cpsFinal$year)
cpsFinal$tcInter3<-0
cpsFinal$tcInter3[cpsFinal$state==21 | cpsFinal$state==23]<-1 #New York/Penn
cpsFinal$tcInter3=cpsFinal$tcInter3*as.numeric(cpsFinal$year)

#Get variance covariance matrix under clustered SEs
get_CL_vcov<-function(model, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  
  #calculate degree of freedom adjustment
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  
  #calculate the uj's
  uj  <- apply(estfun(model),2, function(x) tapply(x, cluster, sum))
  
  #use sandwich to get the var-covar matrix
  vcovCL <- dfc*sandwich(model, meat=crossprod(uj)/N)
  return(vcovCL)
}

#Medicaid coverage ####################################
#Overall
fit<-lm(mcaid ~ (treat*postExpansion)+
          factor(raceGroup)+male+hispanic+factor(povratio)+
          age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+
          factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

####Subgroups
cpsFinal$whiteNH<-ifelse(cpsFinal$white==1 & cpsFinal$hispanic==0,1,0)
cpsFinal$nonwhite<-ifelse(cpsFinal$whiteNH==1,0,1)
#White Non-latino
fit<-lm(mcaid ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*nonwhite)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#Nonwhite/latino
fit<-lm(mcaid ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*whiteNH)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

cpsFinal$older<-ifelse(cpsFinal$a.age>=35,1,0)
cpsFinal$younger<-ifelse(cpsFinal$older==1,0,1)
#Age (younger)
fit<-lm(mcaid ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*older)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#Age (older)
fit<-lm(mcaid ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*younger)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

cpsFinal$poor<-ifelse(cpsFinal$povratio<8,1,0)
cpsFinal$nonpoor<-ifelse(cpsFinal$poor==1,0,1)
#Poor
fit<-lm(mcaid ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*nonpoor)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#Non-Poor
fit<-lm(mcaid ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*poor)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

#######################################################

### No Health Insurance################################
fit<-lm((1-anyHI) ~ (treat*postExpansion)+
          factor(raceGroup)+male+hispanic+factor(povratio)+
          age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+
          factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)


#White Non-latino
fit<-lm((1-anyHI) ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*nonwhite)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#Non-white/latino
fit<-lm((1-anyHI) ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*whiteNH)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

#Age (younger)
fit<-lm((1-anyHI) ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*older)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#Age (older)
fit<-lm((1-anyHI) ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*younger)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

#Poor
fit<-lm((1-anyHI) ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*nonpoor)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#Non-Poor
fit<-lm((1-anyHI) ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*poor)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

#######################################################

###Excellent or very good health################################
fit<-lm(goodHealth ~ (treat*postExpansion)+
          factor(raceGroup)+male+hispanic+factor(povratio)+
          age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+
          factor(tcInter1)+factor(tcInter2)+factor(tcInter3)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

#White Non-latino
fit<-lm(goodHealth ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*nonwhite)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#NonWhite/Latino
fit<-lm(goodHealth ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*whiteNH)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)


#Age (younger)
fit<-lm(goodHealth ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*older)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#Age (older)
fit<-lm(goodHealth ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*younger)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)

#Poor
fit<-lm(goodHealth ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*nonpoor)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)
#Non-Poor
fit<-lm(goodHealth ~ (treat*postExpansion)+factor(raceGroup)+male+hispanic+factor(povratio)+age2534+age3544+age4554+age5564+
          factor(year)+factor(state)+factor(tcInter1)+factor(tcInter2)+factor(tcInter3)+(treat*postExpansion*poor)
        ,data=cpsFinal,weights=wt)
fit.vcovCL<-get_CL_vcov(fit, cpsFinal$state)
coeftest(fit, fit.vcovCL)


##### State Mortality Analysis #####
setwd(<INSERT WD HERE>)

####Figure 1A ################################################################
state7998<-read.table("State, 1979-1998.txt",header=TRUE)
state9914<-read.table("State, 1999-2014.txt",header=TRUE)
state7914<-rbind(state7998,state9914) #Merge
ageGroups<-c("20-24","25-34","35-44","45-54","55-64") 
state7914<-subset(state7914,Age.Group.Code %in% ageGroups)#Restrict to non-elderly adults
expansion<-c("Arizona","Maine","New York")
state7914$treat<-ifelse(state7914$State %in% expansion,1,0)
#Define reference year (0=year of expansion)
exp2002<-c("Maine","New Hampshire")
state7914$expYear<-ifelse(state7914$State %in% exp2002,2002,2001)
state7914$refYear<-(state7914$Year-state7914$expYear)
#Convert to numeric
state7914$pop<-as.numeric(as.character(state7914$Population))
state7914$death<-as.numeric(as.character(state7914$Deaths))

#Output overall mortality rate by year
#Sums
aggTreat<-matrix(0,27,4) #[Year][Pop,Deaths,Rate per 100,000]
aggControl<-matrix(0,27,4) #[Year][Pop,Deaths]
for(i in 1:27){
  year=i-14
  aggTreat[i,1]=year #Year
  aggTreat[i,2]=sum(state7914$pop[state7914$treat==1 & state7914$refYear==year]) #Population
  aggTreat[i,3]=sum(state7914$death[state7914$treat==1 & state7914$refYear==year]) #Deaths
  aggTreat[i,4]=(aggTreat[i,3]/aggTreat[i,2])*100000
  aggControl[i,1]=year #Year
  aggControl[i,2]=sum(state7914$pop[state7914$treat==0 & state7914$refYear==year]) #Population
  aggControl[i,3]=sum(state7914$death[state7914$treat==0 & state7914$refYear==year]) #Deaths
  aggControl[i,4]=(aggControl[i,3]/aggControl[i,2])*100000
}
#Print
print(c("Year","Treat","Control"))
for(i in 1:27){
  print(c(aggTreat[i,1],aggTreat[i,4],aggControl[i,4]))
}
#Save dataset
save(state7914,file="MortState_Clean.RData")
##############################################################################

### Table 1 Mortality Rate ##################################################
##OVERALL
#Pre-expansion: Years -4 to 0 
popTreat=sum(state7914$pop[state7914$treat==1 & state7914$refYear>=-4 & state7914$refYear<=0])
deathTreat=sum(state7914$death[state7914$treat==1 & state7914$refYear>=-4 & state7914$refYear<=0])
popControl=sum(state7914$pop[state7914$treat==0 & state7914$refYear>=-4 & state7914$refYear<=0])
deathControl=sum(state7914$death[state7914$treat==0 & state7914$refYear>=-4 & state7914$refYear<=0])
print((deathTreat/popTreat)*100000) #Treat
print((deathControl/popControl)*100000) #Control

###By Cause of Death (ICD Chapter)
stateCOD7998<-read.table("State COD, 1979-1998.txt",header=TRUE)
stateCOD9914<-read.table("State COD, 1999-2014.txt",header=TRUE)
stateCOD7914<-rbind(stateCOD7998,stateCOD9914) #Merge
stateCOD7914$external<-ifelse(stateCOD7914$ICD.Chapter=="External causes of morbidity and mortality" | 
                                stateCOD7914$ICD.Chapter=="External causes of injury and poisoning",1,0)
stateCOD7914$treat<-ifelse(stateCOD7914$State %in% expansion,1,0)
#Define reference year (0=year of expansion)
stateCOD7914$expYear<-ifelse(stateCOD7914$State %in% exp2002,2002,2001)
stateCOD7914$refYear<-(stateCOD7914$Year-stateCOD7914$expYear)
#Convert to numeric
stateCOD7914$pop<-as.numeric(as.character(stateCOD7914$Population))
stateCOD7914$death<-as.numeric(as.character(stateCOD7914$Deaths))
#Save dataset
save(stateCOD7914,file="MortStateCOD_Clean.RData")

#Calculate for Table 1
#External
popTreat=sum(stateCOD7914$pop[stateCOD7914$external==1 & stateCOD7914$treat==1 & stateCOD7914$refYear>=-4 & stateCOD7914$refYear<=0]) #Pre-expansion: Years -4 to 0 
deathTreat=sum(stateCOD7914$death[stateCOD7914$external==1 & stateCOD7914$treat==1 & stateCOD7914$refYear>=-4 & stateCOD7914$refYear<=0],na.rm = TRUE)
popControl=sum(stateCOD7914$pop[stateCOD7914$external==1 & stateCOD7914$treat==0 & stateCOD7914$refYear>=-4 & stateCOD7914$refYear<=0])
deathControl=sum(stateCOD7914$death[stateCOD7914$external==1 & stateCOD7914$treat==0 & stateCOD7914$refYear>=-4 & stateCOD7914$refYear<=0],na.rm = TRUE)
print((deathTreat/popTreat)*100000) #Treat
print((deathControl/popControl)*100000) #Control

#Internal - only sum population for one category (use external)
deathTreat=sum(stateCOD7914$death[stateCOD7914$external==0 & stateCOD7914$treat==1 & stateCOD7914$refYear>=-4 & stateCOD7914$refYear<=0],na.rm = TRUE)
deathControl=sum(stateCOD7914$death[stateCOD7914$external==0 & stateCOD7914$treat==0 & stateCOD7914$refYear>=-4 & stateCOD7914$refYear<=0],na.rm = TRUE)
print((deathTreat/popTreat)*100000) #Treat
print((deathControl/popControl)*100000) #Control
############################################################################


##### County-level Mortality Analysis #####################
#####County-level mortality analysis ######

####Get county-level factors#####
saipe.data <- function(start,end){
  num.yrs = end - start
  var.names = c("st","cty","povall.n","povall.n.lb","povall.n.ub","povall.p","povall.p.lb","povall.p.ub",
                "povu18.n","povu18.n.lb","povu18.n.ub","povu18.p","povu18.p.lb","povu18.p.ub",
                "pov517.n","pov517.n.lb","pov517.n.ub","pov517.p","pov517.p.lb","pov517.p.ub",
                "medhhinc","medhhinc,lb","medhhinc.ub","s1","s2","s3","s4","s5","s6","name","st.code","fname")
  saipe=data.frame()
  for(i in 0:num.yrs){
    yr=substr(as.character(start+i),3,4)
    suf=ifelse(start+i>=2004,".txt",".dat")
    in.data <- read.fwf(paste("http://www.census.gov/did/www/saipe/downloads/estmod",yr,"/est",yr,"ALL",suf,sep=""),
                        widths=c(2,4,9,9,9,5,5,5,9,9,9,5,5,5,9,9,9,5,5,5,7,7,7,8,8,8,5,5,5,46,3,23),
                        header=F)
    colnames(in.data)<-var.names
    in.data$year = start+i
    in.data$povall.n <- as.numeric(as.character(in.data$povall.n))
    in.data$povall.p <- as.numeric(as.character(in.data$povall.p))
    in.data$medhhinc <- as.numeric(as.character(in.data$medhhinc))
    in.data <- subset(in.data,cty != 0)[,c("st","cty","povall.n","povall.p","medhhinc","year","name")]
    saipe <- rbind(saipe,in.data)
  }
  return(saipe)
}
saipe <- saipe.data(1997,2007)
saipe <- subset(saipe,st %in% c(4,23,32,33,35,36,42))
lau.data <- function(start,end){
  num.yrs = end - start
  lau=data.frame()
  for(i in 0:num.yrs){
    yr=substr(as.character(start+i),3,4)
    in.data <- read.fwf(paste("http://www.bls.gov/lau/laucnty",yr,".txt",sep=""),
                        width=c(16,5,8,51,6,14,13,12,10), skip=6, n=3217,
                        header=F)
    colnames(in.data)<-c("LAUS.code","st","cty","name.lau","year","n.lf","n.emp","n.unemp","unemp.rate")
    in.data$st<-as.integer(in.data$st)
    in.data$cty<-as.integer(in.data$cty)
    in.data$unemp.rate<-as.numeric(as.character(in.data$unemp.rate))
    #in.data[!is.na(in.data$year),]

    lau <- rbind(lau,in.data[in.data$st != 72,c("st","cty","year","unemp.rate","name.lau")])
  }
  return(lau)
}
lau <- lau.data(1997,2007)
lau <- subset(lau,st %in% c(4,23,32,33,35,36,42))

popest = data.frame()
popest<-rbind(popest,read.csv("http://www.census.gov/popest/data/counties/asrh/2009/files/cc-est2009-alldata-04.csv",header=T))
popest<-rbind(popest,read.csv("http://www.census.gov/popest/data/counties/asrh/2009/files/cc-est2009-alldata-23.csv",header=T))
popest<-rbind(popest,read.csv("http://www.census.gov/popest/data/counties/asrh/2009/files/cc-est2009-alldata-32.csv",header=T))
popest<-rbind(popest,read.csv("http://www.census.gov/popest/data/counties/asrh/2009/files/cc-est2009-alldata-33.csv",header=T))
popest<-rbind(popest,read.csv("http://www.census.gov/popest/data/counties/asrh/2009/files/cc-est2009-alldata-35.csv",header=T))
popest<-rbind(popest,read.csv("http://www.census.gov/popest/data/counties/asrh/2009/files/cc-est2009-alldata-36.csv",header=T))
popest<-rbind(popest,read.csv("http://www.census.gov/popest/data/counties/asrh/2009/files/cc-est2009-alldata-42.csv",header=T))
popest$HISP <- popest$H_MALE+popest$H_FEMALE
popest$year=1998+popest$YEAR
popest <- popest[popest$AGEGRP==0&popest$YEAR>1,c("STATE","COUNTY","year","TOT_POP","HISP")]
popest$pct.hisp <- popest$HISP/popest$TOT_POP
names(popest)<-c("st","cty","year","ctypop","hisp","pct.hisp")

popest90s = data.frame()
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1990.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1991.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1992.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1993.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1994.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1995.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1996.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1997.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1998.txt",header=F))
popest90s<-rbind(popest90s,read.table("http://www.census.gov/popest/data/intercensal/st-co/tables/STCH-Intercensal/STCH-icen1999.txt",header=F))
names(popest90s) <- c("year","stcty","agegrp","sex.race","hisp","pop")
popest90s<-subset(popest90s,floor(stcty/1000) %in% c(4,23,32,33,35,36,42))
popest90s$year = 1900+popest90s$year
popest90s.agg <- aggregate(popest90s$pop,
                     by=list(stcty=popest90s$stcty,
                             hisp=popest90s$hisp,
                             year=popest90s$year)
                     ,sum)
popest90s.nonhisp=subset(popest90s.agg,hisp==1)
popest90s.hisp=subset(popest90s.agg,hisp==2)
popest90s.final <- merge(popest90s.hisp[,-2],popest90s.nonhisp[,-2],by=c("stcty","year"))
popest90s.final$st = floor(popest90s.final$stcty/1000)
popest90s.final$cty = popest90s.final$stcty - floor(popest90s.final$stcty/1000)*1000
names(popest90s.final)=c("stcty","year","hisp","non.hisp","st","cty")
popest90s.final$ctypop=popest90s.final$hisp+popest90s.final$non.hisp
popest90s.final$pct.hisp=popest90s.final$hisp/popest90s.final$ctypop
popest.series <- rbind(popest,popest90s.final[,c("st","cty","year","ctypop","hisp","pct.hisp")])
##########################################

####### Read in county-level mortality and clean ####################
cmf = data.frame()
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/Arizona, 1979-1998.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/Arizona, 1999-2014.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/Maine, 1979-1998.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/Maine, 1999-2014.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/Nevada, 1979-1998.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/Nevada, 1999-2014.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/New Hampshire, 1979-1998.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/New Hampshire, 1999-2014.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/New Mexico, 1979-1998.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/New Mexico, 1999-2014.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/New York, 1979-1998.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/New York, 1999-2014.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/Pennsylvania, 1979-1998.txt", header=T, as.is=T))
cmf = rbind(cmf,read.table("~/Dropbox/Replication/Mortality/Pennsylvania, 1999-2014.txt", header=T, as.is=T))

cmf = cmf[cmf$Age.Group.Code %in% c("20-24","25-34","35-44","45-54","55-64"),]
cmf$deaths <- as.numeric(cmf$Deaths)
cmf$pop <- as.numeric(cmf$Population)
cmf$crude.rate <- as.numeric(cmf$Crude.Rate)
cmf$age <- as.factor(cmf$Age.Group)
cmf$sex <- as.factor(cmf$Gender)
cmf$race <- as.factor(ifelse(cmf$Race.Code %in% c("1002-5","2131-1","A-PI"),"Other",cmf$Race))
cmf$year=cmf$Year
cmf$death.impute = as.integer(is.na(cmf$deaths))
cmf.slim <- subset(cmf,!is.na(pop))
cmf.slim$deaths = ifelse(cmf.slim$death.impute==1,1.4,cmf.slim$deaths)

cmf.agg <- aggregate(cmf.slim[,c("deaths","pop","death.impute")],
                       by=list(stcty=cmf.slim$County.Code,
                               age=cmf.slim$age,
                               sex=cmf.slim$sex,
                               race=cmf.slim$race,
                               year=cmf.slim$year)
                       ,sum)
cmf.9707 <- subset(cmf.agg, year %in% 1997:2007)
cmf.9707$st=as.integer(floor(cmf.9707$stcty/1000))
cmf.9707$cty=as.integer(cmf.9707$stcty-floor(cmf.9707$stcty/1000)*1000)
cmf.9707$death.impute = ifelse(cmf.9707$death.impute==2,1.4,cmf.9707$death.impute)

econ <- merge(lau[,-5],saipe[,-7],by=c("st","cty","year"))
econ.demo <- merge(econ,popest.series[popest.series$year %in% 1997:2007,],by=c("st","cty","year"))
assembled <- merge(cmf.9707,econ.demo,by=c("st","cty","year"))

assembled$treat = ifelse(assembled$st %in% c(4,23,36),1,0)
#assembled$time = ifelse(assembled$st %in% c(23,33),assembled$year-2002,assembled$year-2001)
assembled$post = as.integer(ifelse(assembled$st %in% c(23,33),assembled$year>2002,assembled$year>2001))
assembled$mort.rate = 100000*assembled$deaths/assembled$pop
assembled$income = assembled$medhhinc/1000
assembled$race = relevel(assembled$race, ref="Other")
assembled$tc.pairing = as.factor(ifelse(assembled$st %in% c(23,33),1,ifelse(assembled$st %in% c(4,32,35),2,3)))
###########################

########Regression models ###############################
###Crude mortality rate
lm(mort.rate ~ 1, data=assembled[assembled$treat==1&assembled$post==0,], weights=pop)
lm(mort.rate ~ as.factor(race=="White")-1, data=assembled[assembled$treat==1&assembled$post==0,], weights=pop)
lm(mort.rate ~ as.factor(age%in%c("20-24 years","25-34 years"))-1, data=assembled[assembled$treat==1&assembled$post==0,], weights=pop)
lm(mort.rate ~ as.factor(povall.p>=10.0)-1, data=assembled[assembled$treat==1&assembled$post==0,], weights=pop)
lm(mort.rate ~ tc.pairing-1, data=assembled[assembled$treat==1&assembled$post==0,], weights=pop)

####Net change in mortality
model1 <- lm(mort.rate ~ post:treat+age+sex+race+unemp.rate+povall.p+income+pct.hisp+death.impute+
               as.factor(stcty)+as.factor(year)+as.factor(year):tc.pairing, data=assembled, weights=pop)
summary(model1)

coef(model1)['post:treat']

####Subgroups
model2 <- lm(mort.rate ~ as.factor(race=="White"):post:treat+age+sex+race+unemp.rate+povall.p+income+pct.hisp+death.impute+
               as.factor(stcty)+as.factor(year)+as.factor(year):tc.pairing, data=assembled, weights=pop)
coef(model2)[c('as.factor(race == "White")FALSE:post:treat','as.factor(race == "White")TRUE:post:treat')]

model3 <- lm(mort.rate ~ as.factor(age%in%c("20-24 years","25-34 years")):post:treat+age+sex+race+unemp.rate+povall.p+income+pct.hisp+death.impute+
               as.factor(stcty)+as.factor(year)+as.factor(year):tc.pairing, data=assembled, weights=pop)
coef(model3)[c('as.factor(age %in% c("20-24 years", "25-34 years"))FALSE:post:treat','as.factor(age %in% c("20-24 years", "25-34 years"))TRUE:post:treat')]

model4 <- lm(mort.rate ~ as.factor(povall.p>=10.0):post:treat+age+sex+race+unemp.rate+povall.p+income+pct.hisp+death.impute+
               as.factor(stcty)+as.factor(year)+as.factor(year):tc.pairing, data=assembled, weights=pop)
coef(model4)[c('as.factor(povall.p >= 10)FALSE:post:treat','as.factor(povall.p >= 10)TRUE:post:treat')]

model5 <- lm(mort.rate ~ post:treat+age+sex+race+unemp.rate+povall.p+income+pct.hisp+death.impute+
               as.factor(stcty)+as.factor(year), data=assembled[assembled$tc.pairing=='1',], weights=pop)
coef(model5)['post:treat']
model6 <- lm(mort.rate ~ post:treat+age+sex+race+unemp.rate+povall.p+income+pct.hisp+death.impute+
               as.factor(stcty)+as.factor(year), data=assembled[assembled$tc.pairing=='2',], weights=pop)
coef(model6)['post:treat']
model7 <- lm(mort.rate ~ post:treat+age+sex+race+unemp.rate+povall.p+income+pct.hisp+death.impute+
               as.factor(stcty)+as.factor(year), data=assembled[assembled$tc.pairing=='3',], weights=pop)
coef(model7)['post:treat']

setwd('~/Dropbox/Replication/AnalyticDatasets')
save(cmf,file="cmf_file.Rdata")
save(assembled,file="combined_county_data.Rdata")